# Who Cares if You Vote? Partisan agreement and injunctive norms of voting
# Edward Fieldhouse (1), David Cutts (2), Jack Bailey (1)
# (1) The University of Manchester, (2) The University of Birmingham


# 1. Housekeeping ---------------------------------------------------------

# The following code requires that you have first opened the associated
# project file. If not, click the button on the top right and open it
# ("Open Project...").

# Note also that the Bayesian methods we use below rely on having installed
# the probabilistic programming language Stan. If you have yet to install
# Stan, please see https://mc-stan.org/users/interfaces/.

# Load packages

library(tidyverse)
library(tidybayes)
library(magrittr)
library(jbmisc)   # devtools::install_github("jackobailey/jbmisc)
library(haven)
library(brms)
library(here)


# Load data from the British Election Study Internet Panel. Note: If you
# plan to use this data, please make sure that you download the most recent
# version available at https://www.britishelectionstudy.com.

dta <- read_sav(here("_data", "BES2015_W4_v4.0.sav.zip"))



# 2. Transform data -------------------------------------------------------

# Select variables

dta <-
  dta %>%
  select(
    # Respondent-level covariates
    wt,
    age = Age,
    female = gender,
    edu = edlevel,
    edu2 = education,
    married = marital,
    discuss = discussPolDays,
    att = polAttention,
    no_und = efficacyNotUnderstand,
    pol_care = efficacyPolCare,
    duty = dutyToVote2,
    pid = partyId,
    sq = partyIdSqueeze,
    str = partyIdStrength,
    turn = profile_turnout_2015,
    
    # Discussant-level covariates
    rel = num_range("relationshipName", 1:3),
    app = num_range("discussantApprovalVoteName", 1:3),
    vote = num_range("discussantVoteName", 1:3),
    pol = num_range("discussPolDaysD", 1:3)
  )


# First, we'll deal with all of the respondent-level variables. To get started
# we'll start with age, which we're going to convert to z-scores to help the
# Bayesian models we fit later converge.

dta$age <-
  dta$age %>% 
  mark_na(-8:-9) %>% 
  as.numeric() %>% 
  rescale(type = "z")


# Next, we'll convert the "female" variable to a factor that takes the value 1
# where the respondent is female and 0 where they are male.

dta$female <-
  dta$female %>%
  as_factor()


# We now need to deal with our two education variables. We need "edu2" only as
# it contains information on who has some other qualification and whose are not
# known. We'll add these categories to "edu1", then populate them from "edu2".

dta$edu <-
  dta$edu %>%
  as_factor() %>%
  factor(levels = c("No qualifications",
                    "Below GCSE",
                    "GCSE",
                    "A-level",
                    "Undergraduate",
                    "Postgrad",
                    "Other",
                    "Unknown"),
         labels = c("None",
                    "Below GCSE",
                    "GCSE",
                    "A-level",
                    "Undergraduate",
                    "Postgraduate",
                    "Other",
                    "Unknown"))

dta$edu[dta$edu2 == 18] <- "Other"
dta$edu[is.na(dta$edu) == T] <- "Unknown"


# Now we'll move on to marital status. We want to convert this to a new variable
# that tells us if the respondent is cohabiting. This is because we expect similar
# "marriage" effects for those who are married, living as married, or in civil
# partnerships.

dta$cohabit <-
  dta$married %>%
  as_factor() %>%
  as_dummy(c("Married", "Living as married", "Civil Partnership")) %>%
  factor(labels = c("Other",
                    "Cohabiting"))


# There are now a host of attitudinal variables that we must deal with. These each
# have roughly the same issues. And, in most cases, we can simply mark "Don't know"
# cases as missing, convert them to numeric, and include them in our model. We'll
# do them all at once.

dta$discuss <-
  dta$discuss %>%
  as_factor() %>% 
  mark_na("Don't know") %>%
  as.numeric() %>% 
  subtract(1)


dta$att <-
  dta$att %>%
  as_factor() %>% 
  mark_na("Don't know") %>%
  as.numeric() %>% 
  subtract(1)


dta$no_und <-
  dta$no_und %>%
  as_factor() %>% 
  mark_na("Don't know") %>%
  as.numeric() %>%
  subtract(1)


dta$pol_care <-
  dta$pol_care %>%
  as_factor() %>% 
  mark_na("Don't know") %>%
  as.numeric() %>%
  subtract(1)


dta$duty <- 
  dta$duty %>% 
  as_factor() %>% 
  mark_na("Don't know") %>% 
  as.numeric() %>% 
  subtract(1)


# Now we have to convert the turnout variable to a dummy that takes the value 1
# where the respondent said that they voted and 0 otherwise.

dta$turn <-
  dta$turn %>%
  as_factor() %>% 
  as_dummy("Yes, voted") %>%
  factor(labels = c("Didn't vote", "Voted"))


# Next we have to create a party identification variable that combines the two
# pid variables in our data. First, we use the responses from the squeeze variable
# "sq" to populate our main pid variable where respondents said that they identified
# with no party (10) or did not know (9999). We then convert the resulting variable
# to a factor so that we can include it in our model.

dta$pid[dta$pid %in% c(10, 9999)] <- dta$sq[dta$pid %in% c(10, 9999)]

# Party ID
dta$pid <-
  dta$pid %>%
  as_factor() %>%
  droplevels() %>% 
  factor(labels = c("Conservative",
                    "Labour",
                    "Liberal Democrat",
                    "SNP",
                    "Plaid Cymru",
                    "UKIP",
                    "Green Party",
                    "BNP",
                    "Other",
                    "None",
                    "Don't know"))


# Finally for our respondent-level data, we need to recode the party identification strength
# variable to include those respondents with no party identification, whichb we'll also set
# as the reference category. We'll also mark any cases where the respondent says that they
# "Don't know" how strongly they identified as NA.

dta$str <- 
  dta$str %>% 
  as_factor() %>% 
  mark_na("Don't know") %>% 
  fct_explicit_na("None") %>% 
  fct_relevel("None")



# Next, we must move on to the discussant-level covariates. These are tricky, so the code might
# get quite long below. First, we'll start by converting the dyadic relationship variables to
# factors using the lapply() function.

dta[, paste0("rel", 1:3)] <-
  dta[, paste0("rel", 1:3)] %>%
  lapply(as_factor)


# Now we'll convert the discussants' voting variable so that it takes the same values as the
# respondent party identification variable.

dta[, paste0("vote", 1:3)] <-
  dta[, paste0("vote", 1:3)] %>%
  lapply(as_factor) %>%
  lapply(droplevels) %>% 
  lapply(factor, labels = c("Labour",
                            "Conservative",
                            "Liberal Democrat",
                            "SNP",
                            "Plaid Cymru",
                            "Green Party",
                            "UKIP",
                            "BNP",
                            "Other",
                            "None",
                            "Don't know"))


# Next, we need to create a count of discussants for each respondent. We can do this by adding
# up how many NAs the respondent has across the 3 "rel" columns, then subtracting the answer
# from 3 -- the maximum number of discussants.
dta$n_disc <- 3 - rowSums(is.na(dta[, paste0("rel", 1:3)]) == T)


# Convert to data.frame
dta <- dta %>% data.frame()


# One complicating factor is that some respondents report fewer than 3 discussants, but do not
# necessary put those that they do report in the first slot. Sometimes, for example, respondents
# with 2 discussants will use the 2nd and 3rd slot, or the 1st and 3rd slot. We need to deal with
# this and make sure that where the respondent has fewer than 3 discussants, they always appear in
# the respective column. E.g. a respondent with 1 discussant will have responses in only column 1
# and a respondent with 2 discussants will have responses in only columns 1 and 2. As you can see
# from the output below this is not currently the case. This takes the form of a lot of recoding.

dta[, paste0("rel", 1:3)][dta$n_disc == 2 & is.na(dta$rel1) == T, ] <-
  tibble(rel1 = dta$rel2[dta$n_disc == 2 & is.na(dta$rel1) == T],
         rel2 = dta$rel3[dta$n_disc == 2 & is.na(dta$rel1) == T],
         rel3 = NA)

dta[, paste0("vote", 1:3)][dta$n_disc == 2 & is.na(dta$vote1) == T, ] <-
  tibble(vote1 = dta$vote2[dta$n_disc == 2 & is.na(dta$vote1) == T],
         vote2 = dta$vote3[dta$n_disc == 2 & is.na(dta$vote1) == T],
         vote3 = NA)

dta[, paste0("app", 1:3)][dta$n_disc == 2 & is.na(dta$app1) == T, ] <-
  tibble(app1 = dta$app2[dta$n_disc == 2 & is.na(dta$app1) == T],
         app2 = dta$app3[dta$n_disc == 2 & is.na(dta$app1) == T],
         app3 = NA)

dta[, paste0("rel", 1:3)][dta$n_disc == 2 & is.na(dta$rel2) == T, ] <-
  tibble(rel1 = dta$rel1[dta$n_disc == 2 & is.na(dta$rel2) == T],
         rel2 = dta$rel3[dta$n_disc == 2 & is.na(dta$rel2) == T],
         rel3 = NA)

dta[, paste0("vote", 1:3)][dta$n_disc == 2 & is.na(dta$vote2) == T, ] <-
  tibble(vote1 = dta$vote1[dta$n_disc == 2 & is.na(dta$vote2) == T],
         vote2 = dta$vote3[dta$n_disc == 2 & is.na(dta$vote2) == T],
         vote3 = NA)

dta[, paste0("app", 1:3)][dta$n_disc == 2 & is.na(dta$app2) == T, ] <-
  tibble(app1 = dta$app1[dta$n_disc == 2 & is.na(dta$app2) == T],
         app2 = dta$app3[dta$n_disc == 2 & is.na(dta$app2) == T],
         app3 = NA)

dta[, paste0("rel", 1:3)][dta$n_disc == 1 & is.na(dta$rel1) == T & is.na(dta$rel2) == T, ] <-
  tibble(rel1 = dta$rel3[dta$n_disc == 1 & is.na(dta$rel1) == T & is.na(dta$rel2) == T],
         rel2 = NA,
         rel3 = NA)

dta[, paste0("vote", 1:3)][dta$n_disc == 1 & is.na(dta$vote1) == T & is.na(dta$vote2) == T, ] <-
  tibble(vote1 = dta$vote3[dta$n_disc == 1 & is.na(dta$vote1) == T & is.na(dta$vote2) == T],
         vote2 = NA,
         vote3 = NA)

dta[, paste0("app", 1:3)][dta$n_disc == 1 & is.na(dta$app1) == T & is.na(dta$app2) == T, ] <-
  tibble(app1 = dta$app3[dta$n_disc == 1 & is.na(dta$app1) == T & is.na(dta$app2) == T],
         app2 = NA,
         app3 = NA)

dta[, paste0("rel", 1:3)][dta$n_disc == 1 & is.na(dta$rel1) == T & is.na(dta$rel3) == T, ] <-
  tibble(rel1 = dta$rel2[dta$n_disc == 1 & is.na(dta$rel1) == T & is.na(dta$rel3) == T],
         rel2 = NA,
         rel3 = NA)

dta[, paste0("vote", 1:3)][dta$n_disc == 1 & is.na(dta$vote1) == T & is.na(dta$vote3) == T, ] <-
  tibble(vote1 = dta$vote2[dta$n_disc == 1 & is.na(dta$vote1) == T & is.na(dta$vote3) == T],
         vote2 = NA,
         vote3 = NA)

dta[, paste0("app", 1:3)][dta$n_disc == 1 & is.na(dta$app1) == T & is.na(dta$app3) == T, ] <-
  tibble(app1 = dta$app2[dta$n_disc == 1 & is.na(dta$app1) == T & is.na(dta$app3) == T],
         app2 = NA,
         app3 = NA)


# Because of the way that the multimembership models work, we cannot have any empty columns.
# This is difficult where respondents have only 1 or 2 discussants. Fortunately, this type
# of model does include weights for the multimembership structure. As such, where we have
# missing data, we can repeat discussants across multiple columns but weight them to sum to
# only a single individual. For example, where a respondent has 3 discussants, each column
# has a value and each discussant receives a weight of 1. Where a respondent has only 2
# discussants, we repeat the second instance across the third column then assign each column
# weights of 1, .5, and .5, which sum to 2 -- the number of discussants.

dta[, c("rel3", "vote3", "app3")][dta$n_disc == 2, ] <- 
  dta[, c("rel2", "vote2", "app2")][dta$n_disc == 2, ]

dta[, c("rel2", "vote2", "app2")][dta$n_disc == 1, ] <-
  dta[, c("rel1", "vote1", "app1")][dta$n_disc == 1, ]

dta[, c("rel3", "vote3", "app3")][dta$n_disc == 1, ] <-
  dta[, c("rel1", "vote1", "app1")][dta$n_disc == 1, ]


# Now we'll create the weighting variables. As well as count *how many* discussants each
# respondent has, these also account for how often they discussed politics with them. We
# use the "pol" variables to do this, then create the weights where respondents have 3,
# 2, and 1 discussants.

dta[, paste0("w", 1:3)] <- NA

dta[, paste0("pol", 1:3)] <- 
  dta[, paste0("pol", 1:3)] %>% 
  lapply(as_factor) %>% 
  lapply(mark_na, "Don't know") %>% 
  lapply(as.numeric) %>% 
  lapply(subtract, 1) %>% 
  lapply(divide_by, 7)

dta[, paste0("w", 1:3)][dta$n_disc == 3, ] <- 
  dta[, paste0("pol", 1:3)][dta$n_disc == 3, ]

dta[, paste0("w", 1:3)][dta$n_disc == 2, ] <- 
  tibble(w1 = dta$pol1[dta$n_disc == 2],
         w2 = dta$pol2[dta$n_disc == 2]/2,
         w3 = dta$pol2[dta$n_disc == 2]/2)

dta[, paste0("w", 1:3)][dta$n_disc == 1, ] <-
  tibble(w1 = dta$pol1[dta$n_disc == 1]/3,
         w2 = dta$pol1[dta$n_disc == 1]/3,
         w3 = dta$pol1[dta$n_disc == 1]/3)


# Finally, we need to recode our main independent variable -- discussant approval.
# This is a little bit tricky too. First, we'll recode these variables into dummy
# variables that take the value 1 where the respondent said that the discussant
# would care if they voted and 0 otherwise. We will include the original "app"
# variables at the 2nd level, but need to create a new variable to capture the
# main effect at the individual level while accounting for the number of discussants
# that each respondent has. We do this by calculating the weighted average of the
# "app" variables.

dta[, paste0("app", 1:3)] <-
  dta[, paste0("app", 1:3)] %>%
  lapply(as_factor) %>% 
  lapply(as_dummy, "Yes")

dta <-
  dta %>% 
  mutate(
    app_fix1 = app1*w1,
    app_fix2 = app2*w2,
    app_fix3 = app3*w3
  )

dta$app_fix <- rowMeans(dta[, paste0("app_fix", 1:3)])


# Another tricky bit, we need to create new variables that tell us about the nature of
# each respondents-discussant dyad's partisanship. We can do this using the case_when()
# function.

opt <- c("None", "Don't know")

dta <-
  dta %>%
  mutate(
    prt1 = 
      case_when(
        pid == vote1 & !(pid %in% opt) & !(vote1 %in% opt) & pid != "Other" & vote1 != "Other" & is.na(rel1) == F ~ "Shared Partisanship",
        pid != vote1 & !(pid %in% opt) & !(vote1 %in% opt) & is.na(rel1) == F ~ "Opposing Partisanship",
        pid == "Other" & vote1 == "Other" & is.na(rel1) == F ~ "Opposing Partisanship",
        pid %in% opt & !(vote1 %in% opt) & is.na(rel1) == F ~ "Respondent Non-partisan",
        vote1 %in% opt & !(pid %in% opt) & is.na(rel1) == F ~ "Discussant Non-partisan",
        pid %in% opt & vote1 %in% opt & is.na(rel1) == F ~ "Shared Non-partisan"),
    prt2 = 
      case_when(
        pid == vote2 & !(pid %in% opt) & !(vote2 %in% opt) & pid != "Other" & vote2 != "Other" & is.na(rel2) == F ~ "Shared Partisanship",
        pid != vote2 & !(pid %in% opt) & !(vote2 %in% opt) & is.na(rel2) == F ~ "Opposing Partisanship",
        pid == "Other" & vote2 == "Other" & is.na(rel2) == F ~ "Opposing Partisanship",
        pid %in% opt & !(vote2 %in% opt) & is.na(rel2) == F ~ "Respondent Non-partisan",
        vote2 %in% opt & !(pid %in% opt) & is.na(rel2) == F ~ "Discussant Non-partisan",
        pid %in% opt & vote2 %in% opt & is.na(rel2) == F ~ "Shared Non-partisan"),
    prt3 =
      case_when(
        pid == vote3 & !(pid %in% opt) & !(vote3 %in% opt) & pid != "Other" & vote3 != "Other" & is.na(rel3) == F ~ "Shared Partisanship",
        pid != vote3 & !(pid %in% opt) & !(vote3 %in% opt) & is.na(rel3) == F ~ "Opposing Partisanship",
        pid == "Other" & vote3 == "Other" & is.na(rel3) == F ~ "Opposing Partisanship",
        pid %in% opt & !(vote3 %in% opt) & is.na(rel3) == F ~ "Respondent Non-partisan",
        vote3 %in% opt & !(pid %in% opt) & is.na(rel3) == F ~ "Discussant Non-partisan",
        pid %in% opt & vote3 %in% opt & is.na(rel3) == F ~ "Shared Non-partisan")
  )


# Finally, we'll select only the variables we need for our analysis, then remove any missing
# data using listwise deletion.

dta <-
  dta %>%
  select(wt,
         age,
         female,
         edu,
         cohabit,
         discuss,
         att,
         no_und,
         pol_care,
         duty,
         str,
         turn,
         app_fix,
         num_range("rel", 1:3),
         num_range("app", 1:3),
         num_range("prt", 1:3),
         num_range("w", 1:3)) %>% 
  na.omit()


# 3. Fit models -----------------------------------------------------------

# First, we'll fit the turnout model without civic duty

m1 <- 
  brm(
    formula =
      turn | weights(wt) ~ 1 + age + female + edu + cohabit + discuss + att + no_und + pol_care + str + app_fix +
      (1 + mmc(app1, app2, app3) | mm(prt1, prt2, prt3, weights = cbind(w1, w2, w3), scale = F)),
    family =
      bernoulli(link = "logit"),
    prior = 
      prior(normal(0, 1.5), class = "Intercept") +
      prior(normal(0, .1), class = "b") +
      prior(exponential(2), class = "sd") +
      prior(lkj(2), class = "cor"),
    data = dta,
    iter = 2000,
    chains = 4,
    cores = 4,
    refresh = 1,
    control = list(adapt_delta = .99),
    file = here("_output", "w4_m1")
  )

summary(m1)
ranef(m1)


# Then, we'll fit the vote model *with* civic duty.

m2 <- 
  brm(
    formula =
      turn | weights(wt) ~ 1 + age + female + edu + cohabit + discuss + att + no_und + pol_care + duty + str + app_fix +
      (1 + mmc(app1, app2, app3) | mm(prt1, prt2, prt3, weights = cbind(w1, w2, w3), scale = F)),
    family =
      bernoulli(link = "logit"),
    prior = 
      prior(normal(0, 1.5), class = "Intercept") +
      prior(normal(0, .1), class = "b") +
      prior(exponential(2), class = "sd") +
      prior(lkj(2), class = "cor"),
    data = dta,
    iter = 2000,
    chains = 4,
    cores = 4,
    refresh = 1,
    control = list(adapt_delta = .99),
    file = here("_output", "w4_m2")
  )

summary(m2)
ranef(m2)


# Third, we'll fit the civic duty model.

m3 <- 
  brm(
    formula =
      duty | weights(wt) ~ 1 + age + female + edu + cohabit + discuss + att + no_und + pol_care + str + app_fix +
      (1 + mmc(app1, app2, app3) | mm(prt1, prt2, prt3, weights = cbind(w1, w2, w3), scale = F)),
    family =
      gaussian(),
    prior = 
      prior(normal(2, 1), class = "Intercept") +
      prior(normal(0, 1), class = "b") +
      prior(exponential(5), class = "sd") +
      prior(lkj(2), class = "cor"),
    data = dta,
    iter = 2000,
    chains = 4,
    cores = 4,
    refresh = 1,
    control = list(adapt_delta = .99),
    file = here("_output", "w4_m3")
  )

summary(m3)
ranef(m3)



# 4. Create predicted probability plots -----------------------------------

# We're going to create three different plots, one predicted probability plot
# for each model. These are a little complicated, as they involve creating new
# counterfactual datasets that hold all variables other than those we really
# care about at their mean or their reference categories.

# First, we'll compute the predicted probabilities for the first model.

probs <- data.frame(prt = NA,
                    prob_app1 = NA,
                    prob_app0 = NA)

for(i in unique(m1$data$prt1)){
  
  # Compute predicted probabilities where app == 1
  p_app1 <- fitted(m1,
                   summary = FALSE,
                   newdata = data.frame(age = 0,
                                        female = "Male",
                                        edu = "None",
                                        cohabit = "Other",
                                        discuss = mean(dta$discuss),
                                        att = mean(dta$att),
                                        no_und = mean(dta$no_und),
                                        pol_care = mean(dta$discuss),
                                        str = "None",
                                        app_fix = 1,
                                        prt1 = i,
                                        prt2 = i,
                                        prt3 = i,
                                        app1 = 1,
                                        app2 = 1,
                                        app3 = 1,
                                        w1 = 1/3,
                                        w2 = 1/3,
                                        w3 = 1/3))
  
  # Compute predicted probabilities where app == 0
  p_app0 <- fitted(m1,
                   summary = FALSE,
                   newdata = data.frame(age = 0,
                                        female = "Male",
                                        edu = "None",
                                        cohabit = "Other",
                                        discuss = mean(dta$discuss),
                                        att = mean(dta$att),
                                        no_und = mean(dta$no_und),
                                        pol_care = mean(dta$discuss),
                                        str = "None",
                                        app_fix = 0,
                                        prt1 = i,
                                        prt2 = i,
                                        prt3 = i,
                                        app1 = 0,
                                        app2 = 0,
                                        app3 = 0,
                                        w1 = 1/3,
                                        w2 = 1/3,
                                        w3 = 1/3))
  
  # Add to data frame
  p <- data.frame(prt = i,
                  prob_app1 = as.numeric(p_app1),
                  prob_app0 = as.numeric(p_app0))
  
  # Bind to previous predictions
  probs <- rbind(probs,
                 p)
  
}


# Next, we'll remove any rows that include NAs.

probs <-
  probs %>%
  na.omit()


# Next, we'll reorder the dyadic partisanship factor so that appears on the plot in
# the right order.

probs$prt <- 
  probs$prt %>% 
  factor(levels = c("Shared Non-partisan",
                    "Discussant Non-partisan",
                    "Respondent Non-partisan",
                    "Opposing Partisanship",
                    "Shared Partisanship"),
         labels = c("Shared Nonpartisan",
                    "Discussant Nonpartisan",
                    "Respondent Nonpartisan",
                    "Opposing Partisanship",
                    "Shared Partisanship"))


# Then, we'll compute contrasts for each row
probs$cont <- probs$prob_app1 - probs$prob_app0


# Next, we'll convert everything to long format so that we can plot the predicted
# probabilities and the contrasts on seperate facets.

probs <- 
  probs %>% 
  pivot_longer(
    cols = c(prob_app1, prob_app0, cont)
  ) %>% 
  mutate(
    panel =
      case_when(
        name %in% c("prob_app1", "prob_app0") ~ "Predicted Probability",
        TRUE ~ "Predicted Contrast"
      ) %>% 
      factor(
        levels = c("Predicted Probability", "Predicted Contrast")
      ),
    name =
      name %>% 
      factor(
        levels = c("prob_app0", "prob_app1", "cont"),
        labels = c("Do not approve", "Approve", "Contrast")
      )
  )


# We'll also summarise these data to provide some useful labels

prob_labs <- 
  probs %>% 
  group_by(prt, name, panel) %>% 
  summarise(value = median(value))


# Now that we have the necessary data, we can plot these results using ggplot()

plot1 <- 
  probs %>% 
  ggplot(
    aes(
      y = prt,
      x = value,
      slab_fill = name,
      slab_colour = name
    )
  ) +
  facet_wrap(~panel) +
  stat_halfeye(.width = .95,
                point_size = 1,
                interval_size = 0.5,
                slab_size = 0.5,
                height = .7,
                normalize = "groups") +
  stat_summary(geom = "point",
               fun = "median",
               colour = "white",
               size = 0.3) +
  geom_text(data = prob_labs,
            aes(label = scales::percent(value,
                                        accuracy = 0.1,
                                        suffix = ""),
                x = value + .002,
                y = prt),
            family = "Cabin",
            size = 2,
            vjust = 2,
            hjust = 0.7,
            inherit.aes = FALSE) +
  scale_y_discrete(labels = str_wrap(levels(probs$prt), width = 8)) +
  scale_x_continuous(breaks = seq(0, 1, by = .2),
                     labels = scales::percent_format(accuracy = 1)) +
  coord_cartesian(xlim = c(0, 1)) +
  labs(x = "Prob. of Voting",
       colour = "",
       fill = "") +
  scale_colour_manual(values = c("#D85A00", "#2368B2", "grey40"),
                      aesthetics = "slab_colour") +
  scale_fill_manual(values = c("#E28745", "#5F91C7", "grey60"),
                    aesthetics = "slab_fill") +
  theme_bailey() +
  theme(legend.position = "bottom",
        legend.margin = margin(t = 0, b = 0, unit = "cm"),
        legend.title = element_blank(),
        legend.text = element_text(size = rel(1)),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        axis.text.y = element_text(hjust= .5))


# Now we can render the plot as a jpeg
jpeg(
  file = here("_output", "w4_plot1.jpeg"),
  res = 300,
  units = "in",
  width = 5.97,
  height = 3.69
)
plot1
dev.off()


# Next, we'll compute the predicted probabilities for the second model

probs <- data.frame(prt = NA,
                    prob_app1 = NA,
                    prob_app0 = NA)

for(i in unique(m2$data$prt1)){
  
  # Compute predicted probabilities where app == 1
  p_app1 <- fitted(m2,
                   summary = FALSE,
                   newdata = data.frame(age = 0,
                                        female = "Male",
                                        edu = "None",
                                        cohabit = "Other",
                                        discuss = mean(dta$discuss),
                                        att = mean(dta$att),
                                        no_und = mean(dta$no_und),
                                        pol_care = mean(dta$discuss),
                                        duty = mean(dta$duty),
                                        str = "None",
                                        app_fix = 1,
                                        prt1 = i,
                                        prt2 = i,
                                        prt3 = i,
                                        app1 = 1,
                                        app2 = 1,
                                        app3 = 1,
                                        w1 = 1/3,
                                        w2 = 1/3,
                                        w3 = 1/3))
  
  # Compute predicted probabilities where app == 0
  p_app0 <- fitted(m2,
                   summary = FALSE,
                   newdata = data.frame(age = 0,
                                        female = "Male",
                                        edu = "None",
                                        cohabit = "Other",
                                        discuss = mean(dta$discuss),
                                        att = mean(dta$att),
                                        no_und = mean(dta$no_und),
                                        pol_care = mean(dta$discuss),
                                        duty = mean(dta$duty),
                                        str = "None",
                                        app_fix = 0,
                                        prt1 = i,
                                        prt2 = i,
                                        prt3 = i,
                                        app1 = 0,
                                        app2 = 0,
                                        app3 = 0,
                                        w1 = 1/3,
                                        w2 = 1/3,
                                        w3 = 1/3))
  
  # Add to data frame
  p <- data.frame(prt = i,
                  prob_app1 = as.numeric(p_app1),
                  prob_app0 = as.numeric(p_app0))
  
  # Bind to previous predictions
  probs <- rbind(probs,
                 p)
  
}


# Next, we'll remove any rows that include NAs.

probs <-
  probs %>%
  na.omit()


# Next, we'll reorder the dyadic partisanship factor so that appears on the plot in
# the right order.

probs$prt <- 
  probs$prt %>% 
  factor(levels = c("Shared Non-partisan",
                    "Discussant Non-partisan",
                    "Respondent Non-partisan",
                    "Opposing Partisanship",
                    "Shared Partisanship"),
         labels = c("Shared Nonpartisan",
                    "Discussant Nonpartisan",
                    "Respondent Nonpartisan",
                    "Opposing Partisanship",
                    "Shared Partisanship"))


# Then, we'll compute contrasts for each row
probs$cont <- probs$prob_app1 - probs$prob_app0


# Next, we'll convert everything to long format so that we can plot the predicted
# probabilities and the contrasts on seperate facets.

probs <- 
  probs %>% 
  pivot_longer(
    cols = c(prob_app1, prob_app0, cont)
  ) %>% 
  mutate(
    panel =
      case_when(
        name %in% c("prob_app1", "prob_app0") ~ "Predicted Probability",
        TRUE ~ "Predicted Contrast"
      ) %>% 
      factor(
        levels = c("Predicted Probability", "Predicted Contrast")
      ),
    name =
      name %>% 
      factor(
        levels = c("prob_app0", "prob_app1", "cont"),
        labels = c("Do not approve", "Approve", "Contrast")
      )
  )


# We'll also summarise these data to provide some useful labels

prob_labs <- 
  probs %>% 
  group_by(prt, name, panel) %>% 
  summarise(value = median(value))


# Now that we have the necessary data, we can plot these results using ggplot()

plot2 <- 
  probs %>% 
  ggplot(
    aes(
      y = prt,
      x = value,
      slab_fill = name,
      slab_colour = name
    )
  ) +
  facet_wrap(~panel) +
  stat_halfeye(.width = .95,
                point_size = 1,
                interval_size = 0.5,
                slab_size = 0.5,
                height = .7,
                normalize = "groups") +
  stat_summary(geom = "point",
               fun = "median",
               colour = "white",
               size = 0.3) +
  geom_text(data = prob_labs,
            aes(label = scales::percent(value,
                                        accuracy = 0.1,
                                        suffix = ""),
                x = value + .002,
                y = prt),
            family = "Cabin",
            size = 2,
            vjust = 2,
            hjust = 0.7,
            inherit.aes = FALSE) +
  scale_y_discrete(labels = str_wrap(levels(probs$prt), width = 8)) +
  scale_x_continuous(breaks = seq(0, 1, by = .2),
                     labels = scales::percent_format(accuracy = 1)) +
  coord_cartesian(xlim = c(0, 1)) +
  labs(x = "Prob. of Voting",
       colour = "",
       fill = "") +
  scale_colour_manual(values = c("#D85A00", "#2368B2", "grey40"),
                      aesthetics = "slab_colour") +
  scale_fill_manual(values = c("#E28745", "#5F91C7", "grey60"),
                    aesthetics = "slab_fill") +
  theme_bailey() +
  theme(legend.position = "bottom",
        legend.margin = margin(t = 0, b = 0, unit = "cm"),
        legend.title = element_blank(),
        legend.text = element_text(size = rel(1)),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        axis.text.y = element_text(hjust= .5))


# Now we can render the plot as a jpeg
jpeg(
  file = here("_output", "w4_plot2.jpeg"),
  res = 300,
  units = "in",
  width = 5.97,
  height = 3.69
)
plot2
dev.off()


# Finally, we'll do the same for the third model

probs <- data.frame(prt = NA,
                    prob_app1 = NA,
                    prob_app0 = NA)

for(i in unique(m1$data$prt1)){
  
  # Compute predicted probabilities where app == 1
  p_app1 <- fitted(m3,
                   summary = FALSE,
                   newdata = data.frame(age = 0,
                                        female = "Male",
                                        edu = "None",
                                        cohabit = "Other",
                                        discuss = mean(dta$discuss),
                                        att = mean(dta$att),
                                        no_und = mean(dta$no_und),
                                        pol_care = mean(dta$discuss),
                                        str = "None",
                                        app_fix = 1,
                                        prt1 = i,
                                        prt2 = i,
                                        prt3 = i,
                                        app1 = 1,
                                        app2 = 1,
                                        app3 = 1,
                                        w1 = 1/3,
                                        w2 = 1/3,
                                        w3 = 1/3))
  
  # Compute predicted probabilities where app == 0
  p_app0 <- fitted(m3,
                   summary = FALSE,
                   newdata = data.frame(age = 0,
                                        female = "Male",
                                        edu = "None",
                                        cohabit = "Other",
                                        discuss = mean(dta$discuss),
                                        att = mean(dta$att),
                                        no_und = mean(dta$no_und),
                                        pol_care = mean(dta$discuss),
                                        str = "None",
                                        app_fix = 0,
                                        prt1 = i,
                                        prt2 = i,
                                        prt3 = i,
                                        app1 = 0,
                                        app2 = 0,
                                        app3 = 0,
                                        w1 = 1/3,
                                        w2 = 1/3,
                                        w3 = 1/3))
  
  # Add to data frame
  p <- data.frame(prt = i,
                  prob_app1 = as.numeric(p_app1),
                  prob_app0 = as.numeric(p_app0))
  
  # Bind to previous predictions
  probs <- rbind(probs,
                 p)
  
}


# Next, we'll remove any rows that include NAs.

probs <-
  probs %>%
  na.omit()


# Next, we'll reorder the dyadic partisanship factor so that appears on the plot in
# the right order.

probs$prt <- 
  probs$prt %>% 
  factor(levels = c("Shared Non-partisan",
                    "Discussant Non-partisan",
                    "Respondent Non-partisan",
                    "Opposing Partisanship",
                    "Shared Partisanship"),
         labels = c("Shared Nonpartisan",
                    "Discussant Nonpartisan",
                    "Respondent Nonpartisan",
                    "Opposing Partisanship",
                    "Shared Partisanship"))


# Then, we'll compute contrasts for each row
probs$cont <- probs$prob_app1 - probs$prob_app0


# Next, we'll convert everything to long format so that we can plot the predicted
# probabilities and the contrasts on seperate facets.

probs <- 
  probs %>% 
  pivot_longer(
    cols = c(prob_app1, prob_app0, cont)
  ) %>% 
  mutate(
    panel =
      case_when(
        name %in% c("prob_app1", "prob_app0") ~ "Predicted Outcome",
        TRUE ~ "Predicted Contrast"
      ) %>% 
      factor(
        levels = c("Predicted Outcome", "Predicted Contrast")
      ),
    name =
      name %>% 
      factor(
        levels = c("prob_app0", "prob_app1", "cont"),
        labels = c("Do not approve", "Approve", "Contrast")
      )
  )


# We'll also summarise these data to provide some useful labels

prob_labs <- 
  probs %>% 
  group_by(prt, name, panel) %>% 
  summarise(value = median(value))


# Now that we have the necessary data, we can plot these results using ggplot()

plot3 <- 
  probs %>% 
  ggplot(
    aes(
      y = prt,
      x = value,
      slab_fill = name,
      slab_colour = name
    )
  ) +
  facet_wrap(~panel) +
  stat_halfeye(.width = .95,
                point_size = 1,
                interval_size = 0.5,
                slab_size = 0.5,
                height = .7,
                normalize = "groups") +
  stat_summary(geom = "point",
               fun = "median",
               colour = "white",
               size = 0.3) +
  geom_text(data = prob_labs,
            aes(label = round(value, 1),
                x = value + .002,
                y = prt),
            family = "Cabin",
            size = 2,
            vjust = 2,
            hjust = 0.7,
            inherit.aes = FALSE) +
  scale_y_discrete(labels = str_wrap(levels(probs$prt), width = 8)) +
  scale_x_continuous(breaks = 0:4) +
  coord_cartesian(xlim = c(0, 4)) +
  labs(x = 'It is every citizen’s duty to vote in an election (0 = "Strongly Disagree", 4 = "Strongly Agree")',
       colour = "",
       fill = "") +
  scale_colour_manual(values = c("#D85A00", "#2368B2", "grey40"),
                      aesthetics = "slab_colour") +
  scale_fill_manual(values = c("#E28745", "#5F91C7", "grey60"),
                    aesthetics = "slab_fill") +
  theme_bailey() +
  theme(legend.position = "bottom",
        legend.margin = margin(t = 0, b = 0, unit = "cm"),
        legend.title = element_blank(),
        legend.text = element_text(size = rel(1)),
        axis.title.y = element_blank(),
        axis.text.y = element_text(hjust= .5))


# Now we can render the plot as a jpeg
jpeg(
  file = here("_output", "w4_plot3.jpeg"),
  res = 300,
  units = "in",
  width = 5.97,
  height = 3.69
)
plot3
dev.off()

